# Replication Code: Can (Thin) Populism be manipulated without manipulating Host Ideology? Evidence from a conjoint validation approach
# Script 03: Data Cleaning, Recoding, and Analysis of UK Wide Data

# Ensure packages from script_01 are loaded and Prolific_UK_Raw.csv is in working directory


## Load Raw UK Data

data_wide_UK <- read.csv('Prolific_UK_Raw.csv', stringsAsFactors = T, header=T)[-2:-3,]

filename_UK <- "Prolific_UK_Raw.csv"
headers_UK = read_csv(filename_UK, col_names = FALSE, n_max = 1)
data_wide_UK = read_csv(filename_UK, skip = 3, col_names = FALSE)
colnames(data_wide_UK)= headers_UK

# Overview of variables
names(data_wide_UK)

## Data cleaning and examination

# Remove Respondents who did not start Survey due to missing GDPR consent
table(data_wide_UK$GDPR_consent)
data_wide_UK <- data_wide_UK[is.na(data_wide_UK["GDPR_consent"]),]
table(data_wide_UK$Finished)

# Remove Respondents who did not complete survey
table(data_wide_UK$Finished)
data_wide_UK <- data_wide_UK[(data_wide_UK$Finished==1),]

# Information on Survey Duration
summary(data_wide_UK$`Duration (in seconds)`)

# Mock Vignette Attention Check
table(data_wide_UK$attent_vignette_out)
data_wide_UK$attentive <- car::recode(data_wide_UK$attent_vignette_out, "1:3=0; 4=1; 5:6=0")
table(data_wide_UK$attentive)


## Variable recodes 

# Sex
table(data_wide_UK$gender)
data_wide_UK$gender <- as.factor(data_wide_UK$gender)
levels(data_wide_UK$gender) <- c("Male", "Female", "Other")

# Race
table(data_wide_UK$race)
data_wide_UK$race <- as.factor(data_wide_UK$race)
levels(data_wide_UK$race) <- c("White", "Black", "Hispanic", "Asian", "Other")

# Age
table(data_wide_UK$yearborn)
data_wide_UK$age <- ifelse(data_wide_UK$yearborn > 1000, 2025 - data_wide_UK$yearborn, data_wide_UK$yearborn)
table(data_wide_UK$age)
data_wide_UK <- data_wide_UK %>% mutate(agegroup = case_when(age >= 65  & age <= 100 ~ '65-100',
                                                             age >= 50  & age <= 64 ~ '50-64', 
                                                             age >= 36  & age <= 49 ~ '36-49',
                                                             age >= 26  & age <= 35 ~ '26-35',
                                                             age >= 18  & age <= 25 ~ '18-25'))

# Education
table(data_wide_UK$educ)
data_wide_UK$educ <- as.factor(data_wide_UK$educ)
levels(data_wide_UK$educ) <- c("Primary school", "Secondary school up to 16 years", "Higher or secondary or further education (A-levels, BTEC, etc.)", "College or university", "Post-graduate degree")

# Income
table(data_wide_UK$income)
data_wide_UK$income <- as.factor(data_wide_UK$income)
levels(data_wide_UK$income) <- c("Under £20,000", "£20,001 – £50,000", "£50,001 – £100,000", "Above £100,000", "Prefer not to answer")

# Partisanship
table(data_wide_UK$pid0)
data_wide_UK$`Respondent Partisanship` <- as.factor(data_wide_UK$pid0)
levels(data_wide_UK$`Respondent Partisanship`) <- c("Conservative", "Labour", "Liberal Democrat", "SNP", "Plaid Cymru", "Reform UK", "UKIP", "Green Party", "Other", "None")
table(data_wide_UK$`Respondent Partisanship`)

# Create Dataset with Response.ID and Partisanship for merging
# Note: Only includes parties with n>50 respondents
partisanship_df_UK <- data.frame(data_wide_UK$ResponseId, data_wide_UK$`Respondent Partisanship`)
colnames(partisanship_df_UK)= c("Response.ID", "PID")
partisanship_df_UK <- dplyr::filter(partisanship_df_UK, PID %in% c("Conservative", "Labour", "Liberal Democrat", "Reform UK", "Green Party", "None"))
write.csv(partisanship_df_UK, "partisanship_df_UK.csv", row.names = FALSE)


# Populist Attitudes (See Appendix K for details)

# Recode so that higher values = more populist

table(data_wide_UK$ppl1)
table(data_wide_UK$ppl2) # reverse coded
table(data_wide_UK$ppl3)
table(data_wide_UK$ant1)
table(data_wide_UK$ant2) # reverse coded
table(data_wide_UK$ant3)
table(data_wide_UK$man1)
table(data_wide_UK$man2) # reverse coded
table(data_wide_UK$man3)

# Recoding and renaming of populist attitudes variables

data_wide_UK$Pop1 <- car::recode(data_wide_UK$ppl1, "1=5; 2=4; 3=3; 4=2; 5=1")
data_wide_UK$Pop2 <- data_wide_UK$ppl2
data_wide_UK$Pop3 <- car::recode(data_wide_UK$ppl3, "1=5; 2=4; 3=3; 4=2; 5=1")
data_wide_UK$Pop4 <- car::recode(data_wide_UK$ant1, "1=5; 2=4; 3=3; 4=2; 5=1")
data_wide_UK$Pop5 <- data_wide_UK$ant2
data_wide_UK$Pop6 <- car::recode(data_wide_UK$ant3, "1=5; 2=4; 3=3; 4=2; 5=1")
data_wide_UK$Pop7 <- car::recode(data_wide_UK$man1, "1=5; 2=4; 3=3; 4=2; 5=1")
data_wide_UK$Pop8 <- data_wide_UK$man2
data_wide_UK$Pop9 <- car::recode(data_wide_UK$man3, "1=5; 2=4; 3=3; 4=2; 5=1")

# Prepare Data for Factor Analysis
thin_pop_UK <- data_wide_UK %>% select("ResponseId", "Pop1", "Pop2", "Pop3", "Pop4", "Pop5", 
                                       "Pop6", "Pop7", "Pop8", "Pop9") %>%
  distinct(ResponseId, .keep_all = T) 

# Run Initial EFA model
fa_thin_pop_UK <- fa(thin_pop_UK[,2:10], nfactors = 1, scores = "regression", cor = "poly")

# Examine factor loadings
fa_thin_pop_UK$loadings

# Run EFA model (threshold 0.45)
fa_thin_pop_UK_threshold <- fa(thin_pop_UK[,4:8], nfactors = 1, scores = "regression", cor = "poly")

# Save factor scores
fa_scores_threshold_UK <- data_frame(ResponseId = thin_pop_UK$ResponseId,
                                     scores = fa_thin_pop_UK_threshold$scores)

# Append factor scores
data_wide_UK <- left_join(data_wide_UK, fa_scores_threshold_UK, by = 'ResponseId')


# Create Populist Attitudes Mean Split
mean(data_wide_UK$scores)
data_wide_UK <- mutate(data_wide_UK, 
                       populist_factor = as.factor(case_when(scores >= -3.507661e-17 ~ 'Populist',
                                                            scores < -3.507661e-17 ~ 'Non-populist')))

# Create Dataset with Response.ID and Populist Attitudes for merging
populism_df_UK <- data.frame(data_wide_UK$ResponseId, data_wide_UK$populist_factor)
colnames(populism_df_UK)= c("Response.ID", "Populism_Factor")
write.csv(populism_df_UK, "populism_df_UK.csv", row.names = FALSE)


## Prepare Demographic Tables for Appendix A Tables

demographics_UK <- select(data_wide_UK, gender, race, agegroup, educ, income, `Respondent Partisanship`) %>%
  rename(., Sex = gender,
         `Race or Ethnicity` = race,
         Age = agegroup,
         Education = educ,
         `Household Income` = income,
         Partisanship = `Respondent Partisanship`)

# Table is created in script_06 or by "uncommenting" line below
# datasummary_skim(demographics_UK, output = 'tableA3.html')







